#Load Libraries

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

#Load Files

SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep = "\t")

#Adjust Figure Size

SNPs$chromosome=ordered(SNPs$chromosome,levels=c(seq(1,22),"X","Y","MT"))
ggplot(data=SNPs)+
  geom_bar(mapping=aes(x=genotype,fill=chromosome))+
  coord_polar()+
  ggtitle("Total SNPs for each genotype")+
  ylab("Total number of SNPs")+
  xlab("Genotype")

#Plot graph to a pdf outputfile

pdf("SNP_example_plot.pdf", width=6, height=3)
ggplot(data=SNPs) + 
  geom_bar(mapping=aes(x=chromosome, fill=genotype))
dev.off()
## quartz_off_screen 
##                 2

#Plot graph to a png outputfile

ppi <- 300
png("SNP_example_plot.png", width=6*ppi, height=6*ppi, res=ppi)
ggplot(data=SNPs)+
  geom_bar(mapping=aes(x=chromosome, fill=genotype))
dev.off
## function (which = dev.cur()) 
## {
##     if (which == 1) 
##         stop("cannot shut down device 1 (the null device)")
##     .External(C_devoff, as.integer(which))
##     dev.cur()
## }
## <bytecode: 0x7f922ba42bd0>
## <environment: namespace:grDevices>

#Rmarkdown loading images Genotype counts per chromosome #Alternative way using html Genotype counts per chromosome #Interactive graphs and tables in RMarkdown reports #Version 1

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p<-ggplot(data=iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species))+
  geom_point()
ggplotly(p)

#Version 2

library(plotly)
ggplotly(
  ggplot(data=iris,aes(x=Sepal.Length, y=Sepal.Width, color=Species))+
    geom_point()
)

#InteractiveTable

library(DT)
datatable(iris)

#Exercise1

SNPs<- read.table("23andMe_complete.txt", header = TRUE, sep="\t")
p<-ggplot(SNPs,aes(chromosome))+
  geom_bar(fill="blue")+
  ggtitle("Total SNPs for Each Chromosome")+
  ylab("SNP count")+
  xlab("Chromosome")
p

#Exercise2

mycolor<-c("AA"="blue", "AC"="blue", "AG"="blue", "AT"="blue", "CC"="blue", "CG"="blue", "CT"="blue", "GG"="blue", "GT"="blue", "TT"="blue","A"="pink", "C"="pink", "G"="pink", "T"="pink", "D"="orange", "DD"="orange", "DI"="orange","I"="orange","II"="orange","--"="green")
ggplot(SNPs, aes(chromosome, fill = genotype))+
  geom_bar(color = "black")+
  ggtitle("Total SNPs count for each chromosome")+
  ylab("SNPs count")+
  xlab("Type of Chromosome")+
  scale_fill_manual(values=c(mycolor))

#Exercise3

ppi <- 300
png("Lab3_Exercise5_plot.png", width=6*ppi, height=6*ppi, res=ppi)
ggplot(data=SNPs,aes(chromosome,fill=genotype))+
  geom_bar(position="dodge")
dev.off()
## quartz_off_screen 
##                 2

Genotype counts per chromosome #Exercise4

SNPs$chromosome=ordered(SNPs$chromosome, levels=c(seq(1,22),"X","Y","MT"))
ggplot(SNPs,aes(chromosome,fill=genotype))+
  geom_bar(position="dodge")+
  facet_wrap(~chromosome, scales="free")+
  ggtitle("SNP Count for Each Type of Chromosome")+
  ylab("SNP Count(Thousands)")+
  xlab("Type of Chromosome")

#Exercise5

library(plotly)
SNPs$chromosome=ordered(SNPs$chromosome, levels=c(seq(1,22),"X","Y","MT"))
ggplotly(
  ggplot(SNPs,aes(chromosome,fill=genotype))+
  geom_bar(position="dodge")+
  facet_wrap(~genotype, ncol=2)
)

#Exercise 6

library(DT)
chromosome_subset<-subset(SNPs, chromosome=="Y")
datatable(chromosome_subset)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html